import random
import time
from lxml import etree
import requests

url = "https://www.2biqu.com/biqu2648/2719657.html"

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
}

req = requests.get(url, headers=headers)
req.encoding = req.apparent_encoding  # 自动识别编码,有时候可能不起作用
# print(req.text)
tree = etree.HTML(req.text)
#
# 获取标题<h1 class="title"> 第三章 三戏亢海蛟（下）</h1>
title = tree.xpath('//h1[@class="title"]/text()')
print(title)
con= tree.xpath('//*[@id="content"]/text()')
# print(con)
con_cleaned = [text.replace('\xa0', '').replace('\r', '') for text in con]
print(con_cleaned)


# for i in range(len(title_list)):
#     # print(title_list[i])
#     # 直接输出是/book/sanguoyanyi/32.html，不完整
#     # print(href_list[i])
#     # 拼接完整url
#     url = "https://www.shicimingju.com" + href_list[i]
#     # print(url)
#     req = requests.get(url, headers=headers)
#     req.encoding = req.apparent_encoding
#     tree = etree.HTML(req.text)
#     # 对应章节内容
#     con = tree.xpath('//div[@class="card bookmark-list"]//text()')
#     print(title_list[i])
#     print(con)
#     time.sleep(random.randint(1, 3))
